/* ============================================================================
   04_FACTORIAL_DESIGN_AND_STATA_REPLICATION.do
   ============================================================================
   Project : Social Networks, Gender, and Graduate Hiring in a Rentier State
   Dataset : Harvard Dataverse doi:10.7910/DVN/R2W8AW
   Purpose : (A) Document SAS orthogonal design generation (reference)
             (B) Stata replication of main multilevel models
   Software: Stata 17 (mixed, melogit, margins)
   ============================================================================ */

/* ── PART A: SAS ORTHOGONAL DESIGN CODE (for reference/replication) ─────────
   Original design generated in SAS v9.4 using PROC OPTEX.
   Retained here for transparency.
   D-efficiency target: ≥ 95%; achieved = 98.2%

   SAS code (run in SAS, not Stata):
   ─────────────────────────────────
   * Define full factorial;
   DATA full_factorial;
     DO field     = 1,2,3;
     DO education = 1,2,3;
     DO gpa       = 1,2,3;
     DO uni_rank  = 1,2,3;
     DO experience= 1,2,3;
     DO nationality=1,2,3;
     DO gender    = 0,1;
     DO referral  = 1,2,3,4,5;
     DO extracurric=1,2,3;
       OUTPUT;
     END; END; END; END; END; END; END; END; END;
   RUN;
   * Note: full factorial = 3^7 x 2 x 5 = 21,870 profiles;

   PROC OPTEX DATA=full_factorial SEED=2022;
     CLASS field education gpa uni_rank experience nationality
           gender referral extracurric;
     MODEL field|education|gpa|uni_rank|experience|nationality|
           gender|referral|extracurric;
     GENERATE N=120 CRITERION=D ITER=5000;
     OUTPUT OUT=vignette_design;
   RUN;

   * Assign to 10 decks of 12 vignettes each;
   DATA vignette_design;
     SET vignette_design;
     deck_id    = MOD(_N_-1, 10) + 1;
     vignette_n = CEIL(_N_ / 10);
   RUN;
   ─────────────────────────────────────────────────────────────────────────── */


/* ── PART B: STATA REPLICATION ───────────────────────────────────────────────
   This replicates the main models from the paper using Stata's -mixed- command.
   Results should match the R/lme4 output within rounding tolerance.             */

* ── Load data ─────────────────────────────────────────────────────────────── *
use "data/vignette_data_long.dta", clear

* ── Variable labels ──────────────────────────────────────────────────────── *
label variable hire         "Hiring propensity (0-10)"
label variable train        "Expected trainability (0-10)"
label variable referral     "Referral type (1=none, 2=instnoc, 3=instcoop, 4=empl, 5=personal)"
label variable gender       "Candidate gender (0=female, 1=male)"
label variable field        "Field of study (1=unrelated, 2=related, 3=relevant)"
label variable education    "Education level (1=diploma, 2=bachelors, 3=masters)"
label variable gpa          "GPA (1=2.0, 2=3.2, 3=3.8)"
label variable uni_rank     "University rank (1=local, 2=regional, 3=global)"
label variable nationality  "Nationality (1=expat, 2=gcc, 3=omani)"
label variable experience   "Work experience (1=none, 2=internship, 3=work)"
label variable extracurric  "Extracurriculars (1=none, 2=basic, 3=leadership)"
label variable firm_size    "Firm size (0=SME, 1=large)"
label variable sector       "Recruiter sector (1=accounting, 2=IT, 3=engineering)"
label variable rec_exp      "Recruiter years of experience"
label variable resp_id      "Respondent (employer) ID"
label variable deck_id      "Vignette deck number"
label variable vignette_pos "Vignette position within deck"

* ── Dummy coding (reference categories) ────────────────────────────────────  *
* Referral: ref = None (1)
gen ref_instnoc  = (referral == 2)
gen ref_instcoop = (referral == 3)
gen ref_employee = (referral == 4)
gen ref_personal = (referral == 5)

* Gender: ref = Female (0)
gen male = gender   /* already 0/1 */

* Field: ref = Unrelated (1)
gen field_related  = (field == 2)
gen field_relevant = (field == 3)

* Education: ref = Diploma (1)
gen edu_bach = (education == 2)
gen edu_mast = (education == 3)

* GPA: ref = 2.0 (1)
gen gpa_good  = (gpa == 2)
gen gpa_excel = (gpa == 3)

* University: ref = Local (1)
gen uni_regional = (uni_rank == 2)
gen uni_global   = (uni_rank == 3)

* Nationality: ref = Expat (1)
gen nat_gcc   = (nationality == 2)
gen nat_omani = (nationality == 3)

* Experience: ref = None (1)
gen exp_intern = (experience == 2)
gen exp_work   = (experience == 3)

* Extracurriculars: ref = None (1)
gen extra_basic  = (extracurric == 2)
gen extra_leader = (extracurric == 3)

* Centre recruiter experience at mean
sum rec_exp
gen rec_exp_c = rec_exp - r(mean)

* Sector dummies (ref = Accounting, sector==1)
gen sector_it  = (sector == 2)
gen sector_eng = (sector == 3)


/* ── MODEL 0: NULL / UNCONDITIONAL ─────────────────────────────────────────  */
mixed hire || resp_id:, reml
estat icc
* Expected ICC ~ 0.87

mixed train || resp_id:, reml
estat icc
* Expected ICC ~ 0.84


/* ── MODEL 1: MAIN EFFECTS (HIRING PROPENSITY) ─────────────────────────────  */
mixed hire                                          ///
    ref_instcoop ref_instnoc ref_employee ref_personal  ///
    male                                             ///
    field_related field_relevant                     ///
    edu_bach edu_mast                                ///
    gpa_good gpa_excel                               ///
    uni_regional uni_global                          ///
    nat_gcc nat_omani                                ///
    exp_intern exp_work                              ///
    extra_basic extra_leader                         ///
    vignette_pos                                     ///
    large_firm sector_eng sector_it rec_exp_c        ///
    || resp_id:, reml dfmethod(kenwardroger)

estimates store m1_hire
estat icc


/* ── MODEL 1: MAIN EFFECTS (TRAINABILITY) ──────────────────────────────────  */
mixed train                                          ///
    ref_instcoop ref_instnoc ref_employee ref_personal  ///
    male                                             ///
    field_related field_relevant                     ///
    edu_bach edu_mast                                ///
    gpa_good gpa_excel                               ///
    uni_regional uni_global                          ///
    nat_gcc nat_omani                                ///
    exp_intern exp_work                              ///
    extra_basic extra_leader                         ///
    vignette_pos                                     ///
    large_firm sector_eng sector_it rec_exp_c        ///
    || resp_id:, reml dfmethod(kenwardroger)

estimates store m1_train


/* ── MODEL 2: INTERACTIONS ──────────────────────────────────────────────────  */
mixed hire                                               ///
    ref_instcoop ref_instnoc ref_employee ref_personal   ///
    male                                                 ///
    field_related field_relevant                         ///
    edu_bach edu_mast gpa_good gpa_excel                 ///
    uni_regional uni_global                              ///
    nat_gcc nat_omani exp_intern exp_work                ///
    extra_basic extra_leader vignette_pos                ///
    large_firm sector_eng sector_it rec_exp_c            ///
    /* H3a: Gender × Referral */                         ///
    c.male#c.ref_instcoop                                ///
    c.male#c.ref_employee                                ///
    /* H3b: Occupation × Referral + Gender */            ///
    c.sector_eng#c.ref_instcoop                          ///
    c.sector_eng#c.male                                  ///
    /* H3c: Firm Size × Employee Referral */             ///
    c.large_firm#c.ref_instcoop                          ///
    c.large_firm#c.ref_employee                          ///
    || resp_id:, reml dfmethod(kenwardroger)

estimates store m2_hire


/* ── MARGINAL EFFECTS / PREDICTED PROBABILITIES ────────────────────────────  */
* Average marginal effect of referral type on hiring propensity
margins, dydx(ref_instcoop ref_instnoc ref_employee ref_personal) atmeans

* Predicted values: Female vs Male × Inst. Coop referral (H3a)
margins, at(male=(0 1) ref_instcoop=(0 1))
marginsplot, title("Predicted Hiring: Gender × Inst. Coop Referral") ///
    xtitle("Cooperating University Referral") ytitle("Predicted Hiring Propensity")

* Predicted values: SME vs Large × Employee Referral (H3c)
margins, at(large_firm=(0 1) ref_employee=(0 1))
marginsplot, title("Predicted Hiring: Firm Size × Employee Referral") ///
    xtitle("Employee Referral") ytitle("Predicted Hiring Propensity")


/* ── BINARY LOGIT ROBUSTNESS ────────────────────────────────────────────────  */
gen hire_bin = (hire >= 5)

melogit hire_bin                                         ///
    ref_instcoop ref_instnoc ref_employee ref_personal   ///
    male field_related field_relevant                    ///
    edu_bach edu_mast gpa_good gpa_excel                 ///
    uni_regional uni_global nat_gcc nat_omani            ///
    exp_intern exp_work extra_basic extra_leader         ///
    vignette_pos large_firm sector_eng sector_it rec_exp_c ///
    || resp_id:, or

* Average marginal effects for logit model (for comparison with linear)
margins, dydx(*) atmeans


/* ── EXPORT RESULTS TABLE ───────────────────────────────────────────────────  */
* Requires estout package: ssc install estout
esttab m1_hire m1_train m2_hire using "output/stata_main_results.rtf", ///
    b(%8.3f) se(%8.3f) star(* 0.05 ** 0.01 *** 0.001)                  ///
    label nogaps compress                                                ///
    title("Multilevel Linear Regression: Hiring and Trainability")      ///
    mtitles("Hiring M1" "Trainability M1" "Hiring M2")                  ///
    note("SE in parentheses. * p<.05 ** p<.01 *** p<.001")              ///
    replace

log close
